#include <stdlib.h>
#include <stdio.h>
#include <cuda.h>
#include <math.h>
#include <time.h>
#define TRIALS_PER_THREAD 4096
#define BLOCKS 512
#define THREADS 512
#define PI 3.1415926535 // known value of pi

__device__ float my_rand(unsigned int *seed) {
unsigned long a = 16807; // constants for random number generator
unsigned long m = 2147483647;
// 2^31 - 1
unsigned long x = (unsigned long) *seed;
x = (a * x)%m;
*seed = (unsigned int) x;
return ((float)x)/m;
}
__global__ void gpu_monte_carlo(float *estimate) {
unsigned int tid = threadIdx.x + blockDim.x * blockIdx.x;
int points_in_circle = 0;
float x, y;
unsigned int seed = tid + 1;
// starting number in random sequence
for(int i = 0; i < TRIALS_PER_THREAD; i++) {
x = my_rand(&seed);
y = my_rand(&seed);
points_in_circle += (x*x + y*y <= 1.0f); // count if x & y is in the circle.
}
estimate[tid] = 4.0f * points_in_circle / (float) TRIALS_PER_THREAD;
}
float host_monte_carlo(long trials) {
float x, y;
long points_in_circle;
for(long i = 0; i < trials; i++) {
x = rand() / (float) RAND_MAX;
y = rand() / (float) RAND_MAX;
points_in_circle += (x*x + y*y <= 1.0f);
}
return 4.0f * points_in_circle / trials;
}
int main (int argc, char *argv[]) {
clock_t start, stop;
float host[BLOCKS * THREADS];
float *dev;
printf("# of trials per thread = %d, # of blocks = %d, # of threads/block = %d.\n",
TRIALS_PER_THREAD, BLOCKS, THREADS);
start = clock();
cudaMalloc((void **) &dev, BLOCKS * THREADS * sizeof(float));
gpu_monte_carlo<<<BLOCKS, THREADS>>>(dev);
cudaMemcpy(host, dev, BLOCKS * THREADS * sizeof(float), cudaMemcpyDeviceToHost);
float pi_gpu;
for(int i = 0; i < BLOCKS * THREADS; i++) {
pi_gpu += host[i];
}
pi_gpu /= (BLOCKS * THREADS);
stop = clock();
printf("GPU pi calculated in %f s.\n", (stop-start)/(float)CLOCKS_PER_SEC);
start = clock();
float pi_cpu = host_monte_carlo(BLOCKS * THREADS * TRIALS_PER_THREAD);
stop = clock();
printf("CPU pi calculated in %f s.\n", (stop-start)/(float)CLOCKS_PER_SEC);
printf("CUDA estimate of PI = %f [error of %f]\n", pi_gpu, pi_gpu - PI);
printf("CPU estimate of PI = %f [error of %f]\n", pi_cpu, pi_cpu - PI);
return 0;
}

